Prepare data

Read and format data


df_ger_covid <- read_csv('Germany_timeseries_prep.csv')
df_ger_socdist <- read_csv('Germany_socdist_fb_kreis.csv')
df_ger_ctrl <- read.csv2('Germany_controls.csv', sep = ';', dec=',')

# prevalence 
df_ger_covid <- df_ger_covid %>% mutate(date = as.Date(date, "%d%b%Y"),
                                  kreis = as.character(kreis)) %>% 
  dplyr::select(-runday, -kreis_name, -ewz, -shape__area, 
                -cumcase, -anzahlfall, -popdens) %>%
  dplyr::rename(pers_o = open, 
         pers_c = sci,
         pers_e = extra,
         pers_a = agree,
         pers_n = neuro)

df_ger_covid

# social distancing
df_ger_socdist <- df_ger_socdist %>% 
  rename(socdist_single_tile = all_day_ratio_single_tile_users) %>%
  select(kreis, date, socdist_single_tile) %>% 
  mutate(kreis = as.character(kreis))

df_ger_socdist
NA
# controls 
df_ger_ctrl <- df_ger_ctrl %>% select(-kreis_nme) %>%
    mutate(kreis = as.character(kreis),
           popdens = popdens %>% 
             as.character() %>%
             str_replace('\\.', '')%>%
             as.numeric())

df_ger_ctrl
NA
NA

Explore data

Plot prevalence over time


df_ger %>% ggplot(aes(x=time, y=rate_day)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  theme(legend.position="none") +
  ggtitle("Overall prevalence over time")


pers <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')

for (i in pers){

gg <- df_ger %>% mutate(prev_tail = cut(.[[i]], 
      breaks = c(-Inf, quantile(.[[i]], 0.2), quantile(.[[i]], 0.8), Inf),
      labels = c('lower tail', 'center', 'upper tail'))) %>% 
  filter(prev_tail != 'center') %>%
  ggplot(aes(x=time, y=rate_day)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  facet_wrap(~prev_tail) + 
  theme(legend.position="none") +
  ggtitle(i)

print(gg)
}

Plot social distancing (single tile) over time


df_ger %>% ggplot(aes(x=time, y=socdist_single_tile)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  theme(legend.position="none") +
  ggtitle("Overall social distancing (single tile) over time")


pers <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')

for (i in pers){

gg <- df_ger %>% mutate(prev_tail = cut(.[[i]], 
      breaks = c(-Inf, quantile(.[[i]], 0.2), quantile(.[[i]], 0.8), Inf),
      labels = c('lower tail', 'center', 'upper tail'))) %>% 
  filter(prev_tail != 'center') %>%
  ggplot(aes(x=time, y=socdist_single_tile)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  facet_wrap(~prev_tail) + 
  theme(legend.position="none") +
  ggtitle(i)

print(gg)
}

Control for weekend effect


df_ger_loess <- df_ger %>% 
  mutate(weekday = format(date, '%u')) %>% 
  filter(!weekday %in% c('6','7')) %>% 
  split(.$kreis) %>%
  map(~ loess(socdist_single_tile ~ time, data = .)) %>%
  map(predict, 1:max(df_ger$time)) %>% 
  bind_rows() %>% 
  gather(key = 'kreis', value = 'loess') %>% 
  group_by(kreis) %>% 
  mutate(time = row_number())

df_ger_loess

df_ger <- df_ger %>% merge(df_ger_loess, by=c('kreis', 'time')) %>% 
  mutate(weekday = format(date, '%u')) %>% 
  mutate(socdist_single_tile_clean = ifelse(weekday %in% c('6','7'), loess,
                                            socdist_single_tile)) %>%
  arrange(kreis, time) %>% 
  select(-weekday)

df_ger %>% drop_na()
NA

df_ger %>% ggplot(aes(x=time, y=socdist_single_tile_clean)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  theme(legend.position="none") +
  ggtitle("Overall social distancing (single tile) over time")


pers <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')

for (i in pers){

gg <- df_ger %>% mutate(prev_tail = cut(.[[i]], 
      breaks = c(-Inf, quantile(.[[i]], 0.2), quantile(.[[i]], 0.8), Inf),
      labels = c('lower tail', 'center', 'upper tail'))) %>% 
  filter(prev_tail != 'center') %>%
  ggplot(aes(x=time, y=socdist_single_tile_clean)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  facet_wrap(~prev_tail) + 
  theme(legend.position="none") +
  ggtitle(i)

print(gg)
}


df_ger <- df_ger %>% mutate(socdist_single_tile = socdist_single_tile_clean) %>% 
  select(-loess, -socdist_single_tile_clean)

Correlations


df_ger %>% group_by(kreis) %>% 
  summarize_if(is.numeric, mean) %>% 
  select(-kreis, -time) %>% 
  cor(use='pairwise.complete') %>% 
  round(3)
                    pers_e pers_a pers_c pers_n pers_o rate_day socdist_single_tile  women academics    cdu    afd
pers_e               1.000  0.223  0.255 -0.375  0.277    0.164               0.151 -0.050     0.196  0.090 -0.152
pers_a               0.223  1.000  0.347 -0.379  0.167    0.136              -0.014  0.037     0.233  0.024  0.008
pers_c               0.255  0.347  1.000 -0.373 -0.063   -0.004               0.019 -0.002     0.027 -0.017  0.132
pers_n              -0.375 -0.379 -0.373  1.000 -0.046   -0.101              -0.136  0.053    -0.147 -0.032  0.113
pers_o               0.277  0.167 -0.063 -0.046  1.000    0.117               0.077  0.195     0.501 -0.151 -0.213
rate_day             0.164  0.136 -0.004 -0.101  0.117    1.000               0.261 -0.127     0.063  0.390 -0.182
socdist_single_tile  0.151 -0.014  0.019 -0.136  0.077    0.261               1.000 -0.053    -0.026  0.293 -0.434
women               -0.050  0.037 -0.002  0.053  0.195   -0.127              -0.053  1.000     0.335 -0.404 -0.107
academics            0.196  0.233  0.027 -0.147  0.501    0.063              -0.026  0.335     1.000 -0.407 -0.177
cdu                  0.090  0.024 -0.017 -0.032 -0.151    0.390               0.293 -0.404    -0.407  1.000 -0.341
afd                 -0.152  0.008  0.132  0.113 -0.213   -0.182              -0.434 -0.107    -0.177 -0.341  1.000
hospital_beds       -0.037  0.004 -0.157  0.111  0.281   -0.055              -0.257  0.456     0.383 -0.309  0.011
tourism_beds        -0.131 -0.104 -0.053  0.034 -0.099   -0.076               0.002  0.003    -0.167  0.210 -0.044
gdp                  0.141  0.044 -0.052 -0.117  0.343    0.150              -0.015  0.060     0.534 -0.093 -0.223
manufact            -0.042 -0.040 -0.042  0.003 -0.073    0.196              -0.100 -0.313    -0.145  0.229  0.120
airport             -0.188 -0.167 -0.112  0.171 -0.222    0.022              -0.130 -0.208    -0.416  0.277  0.190
age                 -0.289 -0.080  0.125  0.178 -0.368   -0.301              -0.242  0.149    -0.492 -0.167  0.583
popdens              0.127 -0.025 -0.084 -0.007  0.415   -0.044              -0.002  0.350     0.620 -0.473 -0.211
                    hospital_beds tourism_beds    gdp manufact airport    age popdens
pers_e                     -0.037       -0.131  0.141   -0.042  -0.188 -0.289   0.127
pers_a                      0.004       -0.104  0.044   -0.040  -0.167 -0.080  -0.025
pers_c                     -0.157       -0.053 -0.052   -0.042  -0.112  0.125  -0.084
pers_n                      0.111        0.034 -0.117    0.003   0.171  0.178  -0.007
pers_o                      0.281       -0.099  0.343   -0.073  -0.222 -0.368   0.415
rate_day                   -0.055       -0.076  0.150    0.196   0.022 -0.301  -0.044
socdist_single_tile        -0.257        0.002 -0.015   -0.100  -0.130 -0.242  -0.002
women                       0.456        0.003  0.060   -0.313  -0.208  0.149   0.350
academics                   0.383       -0.167  0.534   -0.145  -0.416 -0.492   0.620
cdu                        -0.309        0.210 -0.093    0.229   0.277 -0.167  -0.473
afd                         0.011       -0.044 -0.223    0.120   0.190  0.583  -0.211
hospital_beds               1.000       -0.039  0.400    0.022  -0.046 -0.107   0.390
tourism_beds               -0.039        1.000 -0.113   -0.102   0.357  0.198  -0.225
gdp                         0.400       -0.113  1.000    0.546  -0.165 -0.483   0.474
manufact                    0.022       -0.102  0.546    1.000   0.169 -0.063  -0.137
airport                    -0.046        0.357 -0.165    0.169   1.000  0.316  -0.438
age                        -0.107        0.198 -0.483   -0.063   0.316  1.000  -0.461
popdens                     0.390       -0.225  0.474   -0.137  -0.438 -0.461   1.000
 

Modelling

Prepare functions


# function calculates all relevant models
run_models <- function(y, lvl1_x, lvl2_x, lvl2_id, data, ctrls=F){

  # subset data
  data = data %>% 
    dplyr::select(all_of(y), all_of(lvl1_x), all_of(lvl2_x), all_of(lvl2_id), 
                  popdens, rate_day)
  data = data %>% 
    dplyr::rename(y = all_of(y),
           lvl1_x = all_of(lvl1_x),
           lvl2_x = all_of(lvl2_x),
           lvl2_id = all_of(lvl2_id)
           )
  
  # configure optimization procedure
  ctrl_config <- lmeControl(opt = 'optim', maxIter = 100, msMaxIter = 100)

  # baseline
  baseline <- lme(fixed = y ~ 1, random = ~ 1 | lvl2_id, 
                    data = data,
                    correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')

  # random intercept fixed slope
  random_intercept <- lme(fixed = y ~ lvl1_x + lvl2_x, 
                          random = ~ 1 | lvl2_id,
                            data = data,
                            correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')

  # random intercept random slope
  random_slope <- lme(fixed = y ~ lvl1_x + lvl2_x, 
                      random = ~ lvl1_x | lvl2_id, 
                        data = data,
                        correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')

  # cross level interaction
  interaction <- lme(fixed = y ~ lvl1_x * lvl2_x, 
                     random = ~ lvl1_x | lvl2_id, 
                       data = data,
                       correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')
  
  # create list with results
  results <- list('baseline' = baseline, 
                  "random_intercept" = random_intercept, 
                  "random_slope" = random_slope,
                  "interaction" = interaction)
  
  
  if (ctrls == 'dem' | ctrls == 'prev'){
    
    # random intercept random slope
    random_slope_ctrl_dem <- lme(fixed = y ~ lvl1_x + lvl2_x + popdens,
                              random = ~ lvl1_x | lvl2_id, 
                          data = data,
                          correlation = corAR1(),
                    control = ctrl_config,
                  method = 'ML')
  
    # cross level interaction
    interaction_ctrl_main_dem <- lme(fixed = y ~ lvl1_x * lvl2_x + popdens,
                             random = ~ lvl1_x | lvl2_id, 
                         data = data,
                         correlation = corAR1(),
                    control = ctrl_config,
                  method = 'ML')
  
    # cross level interaction
    interaction_ctrl_int_dem <- lme(fixed = y ~ lvl1_x * lvl2_x + lvl1_x * popdens,
                             random = ~ lvl1_x | lvl2_id, 
                         data = data,
                         correlation = corAR1(),
                    control = ctrl_config,
                  method = 'ML')        
    
    # create list with results
    results <- list('baseline' = baseline, 
                    "random_intercept" = random_intercept, 
                    "random_slope" = random_slope,
                    "interaction" = interaction,
                    "random_slope_ctrl_dem" = random_slope_ctrl_dem,
                    "interaction_ctrl_main_dem" = interaction_ctrl_main_dem,
                    "interaction_ctrl_int_dem" = interaction_ctrl_int_dem)
  }
  
  if (ctrls == 'prev'){
  
    # random intercept random slope
    random_slope_ctrl_prev <- lme(fixed = y ~ lvl1_x + lvl2_x + popdens + rate_day,
                              random = ~ lvl1_x + rate_day | lvl2_id, 
                          data = data,
                          correlation = corAR1(),
                          control = ctrl_config,
                  method = 'ML')  
    
        # cross level interaction
    interaction_ctrl_main_prev <- lme(fixed = y ~ lvl1_x * lvl2_x + popdens + rate_day,
                             random = ~ lvl1_x | lvl2_id, 
                         data = data,
                         correlation = corAR1(),
                    control = ctrl_config,
                  method = 'ML')
  
  
    # cross level interaction
    interaction_ctrl_int_prev<- lme(fixed = y ~ lvl1_x * lvl2_x + lvl1_x * popdens + rate_day,
                             random = ~ lvl1_x + rate_day | lvl2_id, 
                         data = data,
                         correlation = corAR1(),
                          control = ctrl_config,
                  method = 'ML')
  
    # create list with results
    results <- list('baseline' = baseline, 
                    "random_intercept" = random_intercept, 
                    "random_slope" = random_slope,
                    "interaction" = interaction,
                    "random_slope_ctrl_dem" = random_slope_ctrl_dem,
                    "interaction_ctrl_main_dem" = interaction_ctrl_main_dem,
                    "interaction_ctrl_int_dem" = interaction_ctrl_int_dem,                    
                    "random_slope_ctrl_prev" = random_slope_ctrl_prev,
                    "interaction_ctrl_main_prev" = interaction_ctrl_main_prev,
                    "interaction_ctrl_int_prev" = interaction_ctrl_int_prev)
  }
  
  if(ctrls == 'exp'){
    # random intercept random slope
  random_slope_exp <- lme(fixed = y ~ (lvl1_x + I(lvl1_x^2)) + lvl2_x, 
                      random = ~ (lvl1_x + I(lvl1_x^2)) | lvl2_id, 
                        data = data,
                        correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')

  # cross level interaction
  interaction_exp <- lme(fixed = y ~ (lvl1_x + I(lvl1_x^2)) * lvl2_x, 
                     random = ~ (lvl1_x + I(lvl1_x^2)) | lvl2_id, 
                       data = data,
                       correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')  
  
  
  # create list with results
  results <- list('baseline' = baseline, 
                  "random_intercept" = random_intercept, 
                  "random_slope" = random_slope,
                  "interaction" = interaction,                  
                  "random_slope_exp" = random_slope_exp,
                  "interaction_exp" = interaction_exp)
  }
  
  return(results)
        
}

# extracts table with coefficients and tests statistics
extract_results <- function(models) {
  
  models_summary <- models %>% 
  map(summary) %>% 
  map("tTable") %>% 
  map(as.data.frame) %>% 
  map(round, 10) 
  # %>% map(~ .[str_detect(rownames(.), 'Inter|lvl'),])
  
  return(models_summary)
  
}


compare_models <- function(models) {

  mdl_names <- models %>% names()
  
  str = ''
  for (i in mdl_names){
    
    mdl_str <- paste('models$', i, sep = '')
    
    if(i == 'baseline'){
      str <- mdl_str
    }else{
    str <- paste(str, mdl_str, sep=', ')
    }
  }
  
  anova_str <- paste0('anova(', str, ')')
  mdl_comp <- eval(parse(text=anova_str))
  rownames(mdl_comp) = mdl_names
  return(mdl_comp)
}

Rescale Data


lvl2_scaled <- df_ger %>% 
  dplyr::select(-time, -rate_day, -socdist_single_tile, -date) %>% 
  distinct() %>% 
  mutate_at(vars(-kreis), scale)
  
lvl1_scaled <- df_ger %>% select(kreis, time, rate_day, socdist_single_tile) %>% 
  mutate_at(vars(-kreis, -time), scale)

df_ger_scaled <- plyr::join(lvl1_scaled, lvl2_scaled, by = 'kreis')

head(df_ger_scaled)
NA
NA

Predict prevalence

prevalence ~ openness


models_o_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_o', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_o_covid)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem
compare_models(models_o_covid)
NA

prevalence ~ conscientiousness


models_c_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_c', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_c_covid)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem
compare_models(models_c_covid)
NA
NA

prevalence ~ extraversion


models_e_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_e', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_e_covid)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem
compare_models(models_e_covid)
NA
NA

prevalence ~ agreeableness


models_a_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_a', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_a_covid)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem
compare_models(models_a_covid)
NA
NA

prevalence ~ neuroticism


models_n_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_n', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_n_covid)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem
compare_models(models_n_covid)
NA
NA

Predict social distancing

social distancing ~ openness


models_o_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_o', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_o_socdist)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem

$random_slope_ctrl_prev

$interaction_ctrl_main_prev

$interaction_ctrl_int_prev
compare_models(models_o_socdist)
NA

social distancing ~ conscientiousness


models_c_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_c', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_c_socdist)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem

$random_slope_ctrl_prev

$interaction_ctrl_main_prev

$interaction_ctrl_int_prev
compare_models(models_c_socdist)
NA
NA
NA

social distancing ~ extraversion


models_e_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_e', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_e_socdist)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem

$random_slope_ctrl_prev

$interaction_ctrl_main_prev

$interaction_ctrl_int_prev
compare_models(models_e_socdist)
NA
NA
NA

social distancing ~ agreeableness


models_a_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_a', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_a_socdist)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem

$random_slope_ctrl_prev

$interaction_ctrl_main_prev

$interaction_ctrl_int_prev
compare_models(models_a_socdist)
NA
NA
NA

social distancing ~ neuroticism


models_n_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_n', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_n_socdist)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_ctrl_dem

$interaction_ctrl_main_dem

$interaction_ctrl_int_dem

$random_slope_ctrl_prev

$interaction_ctrl_main_prev

$interaction_ctrl_int_prev
compare_models(models_n_socdist)
NA
NA

prevalence ~ conscientiousness


models_c_covid_exp <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_c', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_c_covid_exp)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_exp

$interaction_exp
compare_models(models_c_covid_exp)
NA

prevalence ~ extraversion


models_e_covid_exp <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_e', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_e_covid_exp)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_exp

$interaction_exp
compare_models(models_e_covid_exp)
NA

prevalence ~ agreeableness


models_a_covid_exp <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_a', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_a_covid_exp)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_exp

$interaction_exp
compare_models(models_a_covid_exp)
NA

prevalence ~ neuroticism


models_n_covid_exp <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_n', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_n_covid_exp)
$baseline

$random_intercept

$random_slope

$interaction

$random_slope_exp

$interaction_exp
compare_models(models_n_covid_exp)
NA

Create overview table

Define function to create overview tables


summary_table <- function(models, dv_name, prev=F){

  temp_df_ctrl_main <- NULL
  temp_df_ctrl_int <- NULL
  temp_df_ctrl_int_prev <- NULL
  
  for (i in models){
    results <- i %>% extract_results()
    
    results_ctrl_main <- results$interaction_ctrl_main_dem['lvl1_x:lvl2_x',]
    temp_df_ctrl_main <- temp_df_ctrl_main %>% rbind(results_ctrl_main)
    
    results_ctrl_int <- results$interaction_ctrl_int_dem['lvl1_x:lvl2_x',]
    temp_df_ctrl_int <- temp_df_ctrl_int %>% rbind(results_ctrl_int)
    
    if(prev){
      results_ctrl_int_prev <- results$interaction_ctrl_int_prev['lvl1_x:lvl2_x',]
      temp_df_ctrl_int_prev <- temp_df_ctrl_int_prev %>% rbind(results_ctrl_int_prev)
    }
        
  }
  
  names_ctrl_main <- paste0(dv_name, '~', c('o', 'c', 'e', 'a', 'n'), '*time', '_crtl_popdens')
  rownames(temp_df_ctrl_main) <- names_ctrl_main

  names_ctrl_int <- paste0(dv_name, '~', c('o', 'c', 'e', 'a', 'n'), '*time', '_crtl_popdens*time')
  rownames(temp_df_ctrl_int) <- names_ctrl_int

  if(prev){
    names_ctrl_int_prev <- paste0(dv_name, '~', c('o', 'c', 'e', 'a', 'n'), '*time', '_crtl_popdens*time_prev')
    rownames(temp_df_ctrl_int_prev) <- names_ctrl_int_prev
    
    sum_tab <- rbind(temp_df_ctrl_main, temp_df_ctrl_int, temp_df_ctrl_int_prev) %>% round(4)
  }else{
    sum_tab <- rbind(temp_df_ctrl_main, temp_df_ctrl_int) %>% round(4)
  }


  
  return(sum_tab)

} 

Create overview tables

# prevalence
models_prev <- list(models_o_covid, 
                    models_c_covid, 
                    models_e_covid, 
                    models_a_covid, 
                    models_n_covid)

sum_tab_prev <- summary_table(models_prev, dv_name = 'prev')

write.table(sum_tab_prev, quote=F)
Value Std.Error DF t-value p-value
prev~o*time_crtl_popdens 0.0072 0.0029 8798 2.4355 0.0149
prev~c*time_crtl_popdens 3e-04 0.003 8798 0.0945 0.9247
prev~e*time_crtl_popdens 0.0099 0.0029 8798 3.4019 7e-04
prev~a*time_crtl_popdens 0.0092 0.0029 8798 3.1338 0.0017
prev~n*time_crtl_popdens -0.0072 0.0029 8798 -2.4642 0.0137
prev~o*time_crtl_popdens*time 0.0099 0.0032 8797 3.0928 0.002
prev~c*time_crtl_popdens*time 1e-04 0.003 8797 0.0219 0.9825
prev~e*time_crtl_popdens*time 0.0104 0.0029 8797 3.5486 4e-04
prev~a*time_crtl_popdens*time 0.0091 0.0029 8797 3.1155 0.0018
prev~n*time_crtl_popdens*time -0.0073 0.0029 8797 -2.4725 0.0134
# social distancing
models_socdist <- list(models_o_socdist, 
                       models_c_socdist, 
                       models_e_socdist, 
                       models_a_socdist, 
                       models_n_socdist)

sum_tab_socdist <- summary_table(models_socdist, dv_name = 'socdist', prev=T)

write.table(sum_tab_socdist, quote=F)
Value Std.Error DF t-value p-value
socdist~o*time_crtl_popdens 0.0059 0.0016 8798 3.7756 2e-04
socdist~c*time_crtl_popdens -2e-04 0.0016 8798 -0.097 0.9227
socdist~e*time_crtl_popdens 0.0038 0.0016 8798 2.4132 0.0158
socdist~a*time_crtl_popdens 0.0022 0.0016 8798 1.4111 0.1582
socdist~n*time_crtl_popdens -0.0021 0.0016 8798 -1.3383 0.1808
socdist~o*time_crtl_popdens*time 0.0044 0.0017 8797 2.5576 0.0106
socdist~c*time_crtl_popdens*time 3e-04 0.0016 8797 0.1967 0.8441
socdist~e*time_crtl_popdens*time 0.0031 0.0016 8797 1.9928 0.0463
socdist~a*time_crtl_popdens*time 0.0023 0.0016 8797 1.5053 0.1323
socdist~n*time_crtl_popdens*time -0.0021 0.0016 8797 -1.3209 0.1866
socdist~o*time_crtl_popdens*time_prev 0.0033 0.0017 8796 1.9363 0.0529
socdist~c*time_crtl_popdens*time_prev 3e-04 0.0015 8796 0.1949 0.8455
socdist~e*time_crtl_popdens*time_prev 0.002 0.0016 8796 1.2797 0.2007
socdist~a*time_crtl_popdens*time_prev 0.0013 0.0015 8796 0.8453 0.398
socdist~n*time_crtl_popdens*time_prev -0.0012 0.0015 8796 -0.8056 0.4205

Conditional random forest analysis

Extract slopes prevalence


# slope prevalence
df_ger_slope_prev <- df_ger %>% split(.$kreis) %>% 
  map(~ lm(rate_day ~ time, data = .)) %>%
  map(coef) %>% 
  map_dbl('time') %>% 
  as.data.frame() %>% 
  rownames_to_column('kreis') %>% 
  rename(slope_prev = '.')

# merge with control variables 
df_ger_slope_prev <- df_ger %>% select(-time, -date, -rate_day, -socdist_single_tile) %>%
  distinct() %>% 
  inner_join(df_ger_slope_prev, by = 'kreis') %>%
  drop_na()

df_ger_slope_prev
NA

Extract slopes social distancing


# slope socdist
df_ger_slope_socdist <- df_ger %>% split(.$kreis) %>%
  map(~ lm(socdist_single_tile ~ time, data = .)) %>%
  map(coef) %>%
  map_dbl('time') %>%
  as.data.frame() %>%
  rownames_to_column('kreis') %>%
  rename(slope_socdist = '.')


# merge with control variables 
df_ger_slope_socdist <- df_ger %>% 
  select(-time, -date, -socdist_single_tile, -rate_day) %>%
  distinct() %>%
  inner_join(df_ger_slope_socdist, by = 'kreis') %>%
  drop_na()

df_ger_slope_socdist
NA

Explore distribution of slopes


df_ger_slope_prev %>% ggplot(aes(slope_prev)) + geom_histogram(bins = 100)


df_ger_slope_socdist %>% ggplot(aes(slope_socdist)) + geom_histogram(bins = 100)

NA
NA

CRF prevalence ~ openness


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_o_fit_prev <- cforest(slope_prev ~ pers_o + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_o_varimp_prev <- varimp(crf_o_fit_prev, nperm = 1)
crf_o_varimp_cond_prev <- varimp(crf_o_fit_prev, conditional = T, nperm = 1)

crf_o_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_o_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') +
  theme(axis.text.x = element_text(angle = 90))

CRF prevalence ~ conscientiousness


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_c_fit_prev <- cforest(slope_prev ~ pers_c + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_c_varimp_prev <- varimp(crf_c_fit_prev, nperm = 1)
crf_c_varimp_cond_prev <- varimp(crf_c_fit_prev, conditional = T, nperm = 1)

crf_c_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_c_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

CRF prevalence ~ extraversion


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_e_fit_prev <- cforest(slope_prev ~ pers_e + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_e_varimp_prev <- varimp(crf_e_fit_prev, nperm = 1)
crf_e_varimp_cond_prev <- varimp(crf_e_fit_prev, conditional = T, nperm = 1)

crf_e_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_e_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

CRF prevalence ~ agreeableness


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_a_fit_prev <- cforest(slope_prev ~ pers_a + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_a_varimp_prev <- varimp(crf_a_fit_prev, nperm = 1)
crf_a_varimp_cond_prev <- varimp(crf_a_fit_prev, conditional = T, nperm = 1)

crf_a_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_a_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

CRF prevalence ~ neuroticism


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_n_fit_prev <- cforest(slope_prev ~ pers_n + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_n_varimp_prev <- varimp(crf_n_fit_prev, nperm = 1)
crf_n_varimp_cond_prev <- varimp(crf_n_fit_prev, conditional = T, nperm = 1)

crf_n_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_n_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

CRF social distancing ~ openness


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_o_fit_socdist <- cforest(slope_socdist ~ pers_o + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_o_varimp_socdist <- varimp(crf_o_fit_socdist, nperm = 1)
crf_o_varimp_cond_socdist <- varimp(crf_o_fit_socdist, conditional = T, nperm = 1)

crf_o_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_o_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

CRF social distancing ~ conscientiousness


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_c_fit_socdist <- cforest(slope_socdist ~ pers_c + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_c_varimp_socdist <- varimp(crf_c_fit_socdist, nperm = 1)
crf_c_varimp_cond_socdist <- varimp(crf_c_fit_socdist, conditional = T, nperm = 1)

crf_c_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_c_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

CRF social distancing ~ extraversion


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_e_fit_socdist <- cforest(slope_socdist ~ pers_e + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_e_varimp_socdist <- varimp(crf_e_fit_socdist, nperm = 1)
crf_e_varimp_cond_socdist <- varimp(crf_e_fit_socdist, conditional = T, nperm = 1)

crf_e_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_e_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

CRF social distancing ~ agreeableness


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_a_fit_socdist <- cforest(slope_socdist ~ pers_a + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_a_varimp_socdist <- varimp(crf_a_fit_socdist, nperm = 1)
crf_a_varimp_cond_socdist <- varimp(crf_a_fit_socdist, conditional = T, nperm = 1)

crf_a_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_a_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

CRF social distancing ~ neuroticism


ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_n_fit_socdist <- cforest(slope_socdist ~ pers_n + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_n_varimp_socdist <- varimp(crf_n_fit_socdist, nperm = 1)
crf_n_varimp_cond_socdist <- varimp(crf_n_fit_socdist, conditional = T, nperm = 1)

crf_n_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))


crf_n_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

Change point analysis

Preparation


# keep only counties with full data
kreis_complete <- df_ger_scaled %>% 
  group_by(kreis) %>% 
  summarize(n = n()) %>% 
  filter(n==max(.$n)) %>% 
  .$kreis

Prevalence

df_ger_cpt_prev %>% drop_na() %>% dim()
[1] 392  21

Social distancing

df_ger_cpt_prev_socdist %>% drop_na() %>% dim()
[1] 392  24

for(i in head(df_ger_socdist_cpt_results,5)){
  plot(i)
}

NA

Predicting change points

Linear models predicting change points (no controls)

lm_cpt_socdist_pers %>% summary()

Call:
lm(formula = cpt_day_socdist ~ pers_o + pers_c + pers_e + pers_a + 
    pers_n, data = df_ger_cpt_prev_socdist)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.2799 -1.0828 -0.7745  1.5529  4.7123 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 18.98750    0.07978 238.009  < 2e-16 ***
pers_o      -0.27313    0.08540  -3.198  0.00149 ** 
pers_c       0.10251    0.09059   1.132  0.25848    
pers_e       0.29762    0.09103   3.269  0.00117 ** 
pers_a       0.03447    0.09054   0.381  0.70365    
pers_n       0.22114    0.09398   2.353  0.01911 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.596 on 394 degrees of freedom
Multiple R-squared:  0.05102,   Adjusted R-squared:  0.03898 
F-statistic: 4.237 on 5 and 394 DF,  p-value: 0.0009179

Linear models predicting change points with controls

lm_cpt_socdist_pers %>% summary()

Call:
lm(formula = cpt_day_socdist ~ pers_o + pers_c + pers_e + pers_a + 
    pers_n + women + academics + hospital_beds + gdp + manufact + 
    airport + age + popdens, data = df_ger_cpt_prev_socdist)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.9228 -1.0471 -0.5154  1.1130  4.2457 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)   18.971592   0.074741 253.832  < 2e-16 ***
pers_o         0.005067   0.092110   0.055 0.956163    
pers_c         0.147269   0.089162   1.652 0.099427 .  
pers_e         0.313019   0.087348   3.584 0.000383 ***
pers_a         0.059518   0.089483   0.665 0.506368    
pers_n         0.112731   0.090458   1.246 0.213453    
women         -0.203951   0.100832  -2.023 0.043810 *  
academics     -0.215820   0.137713  -1.567 0.117910    
hospital_beds  0.217942   0.097882   2.227 0.026564 *  
gdp           -0.341968   0.162985  -2.098 0.036555 *  
manufact       0.387349   0.127592   3.036 0.002565 ** 
airport        0.244575   0.089491   2.733 0.006572 ** 
age            0.011052   0.109541   0.101 0.919687    
popdens        0.009737   0.118019   0.083 0.934290    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.478 on 378 degrees of freedom
  (8 observations deleted due to missingness)
Multiple R-squared:  0.1979,    Adjusted R-squared:  0.1704 
F-statistic: 7.176 on 13 and 378 DF,  p-value: 1.515e-12
---
title: "COVID19 GER"
author: "Heinrich Peters"
date: "4/23/2020"
output: html_notebook
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

# MAC
 knitr::opts_knit$set(root.dir = '/Users/hp2500/Google Drive/STUDY/Columbia/Research/Corona/Data/GER')
 
library(lmerTest)
library(nlme)
library(psych)
library(ggplot2)
library(dplyr)
library(tidyverse)
library(party)

```

# Prepare data

### Read and format data
```{r message=FALSE}

df_ger_covid <- read_csv('Germany_timeseries_prep.csv')
df_ger_socdist <- read_csv('Germany_socdist_fb_kreis.csv')
df_ger_ctrl <- read.csv2('Germany_controls.csv', sep = ';', dec=',')

```


```{r}

# prevalence 
df_ger_covid <- df_ger_covid %>% mutate(date = as.Date(date, "%d%b%Y"),
                                  kreis = as.character(kreis)) %>% 
  dplyr::select(-runday, -kreis_name, -ewz, -shape__area, 
                -cumcase, -anzahlfall, -popdens) %>%
  dplyr::rename(pers_o = open, 
         pers_c = sci,
         pers_e = extra,
         pers_a = agree,
         pers_n = neuro)

df_ger_covid
```


```{r}

# social distancing
df_ger_socdist <- df_ger_socdist %>% 
  rename(socdist_single_tile = all_day_ratio_single_tile_users) %>%
  select(kreis, date, socdist_single_tile) %>% 
  mutate(kreis = as.character(kreis))

df_ger_socdist

```


```{r}
# controls 
df_ger_ctrl <- df_ger_ctrl %>% select(-kreis_nme) %>%
    mutate(kreis = as.character(kreis),
           popdens = popdens %>% 
             as.character() %>%
             str_replace('\\.', '')%>%
             as.numeric())

df_ger_ctrl


```


```{r}

# merge
df_ger <- df_ger_covid %>% 
  inner_join(df_ger_socdist, by = c('kreis', 'date')) %>% 
  inner_join(df_ger_ctrl, by = 'kreis')

# keep only counties with full data
kreis_complete <- df_ger %>% 
  group_by(kreis) %>% 
  summarize(n = n()) %>% 
  filter(n==max(.$n)) %>% 
  .$kreis

df_ger <- df_ger %>% filter(kreis %in% kreis_complete)

# create sequence of dates
date_sequence <- seq.Date(min(df_ger$date),
                          max(df_ger$date), 1)
                     
# create data frame with time sequence
df_dates = tibble(date_sequence, 1:length(date_sequence)) 
names(df_dates) <- c('date', 'time')

# merge day index with gps data
df_ger = df_ger %>% 
  merge(df_dates, by='date') %>% 
  arrange(kreis) %>%
  as_tibble()

```


# Explore data

### Plot prevalence over time
```{r}

df_ger %>% ggplot(aes(x=time, y=rate_day)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  theme(legend.position="none") +
  ggtitle("Overall prevalence over time")

pers <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')

for (i in pers){

gg <- df_ger %>% mutate(prev_tail = cut(.[[i]], 
      breaks = c(-Inf, quantile(.[[i]], 0.2), quantile(.[[i]], 0.8), Inf),
      labels = c('lower tail', 'center', 'upper tail'))) %>% 
  filter(prev_tail != 'center') %>%
  ggplot(aes(x=time, y=rate_day)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  facet_wrap(~prev_tail) + 
  theme(legend.position="none") +
  ggtitle(i)

print(gg)
}

```


### Plot social distancing (single tile) over time
```{r}

df_ger %>% ggplot(aes(x=time, y=socdist_single_tile)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  theme(legend.position="none") +
  ggtitle("Overall social distancing (single tile) over time")

pers <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')

for (i in pers){

gg <- df_ger %>% mutate(prev_tail = cut(.[[i]], 
      breaks = c(-Inf, quantile(.[[i]], 0.2), quantile(.[[i]], 0.8), Inf),
      labels = c('lower tail', 'center', 'upper tail'))) %>% 
  filter(prev_tail != 'center') %>%
  ggplot(aes(x=time, y=socdist_single_tile)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  facet_wrap(~prev_tail) + 
  theme(legend.position="none") +
  ggtitle(i)

print(gg)
}

```

### Control for weekend effect 

```{r}

df_ger_loess <- df_ger %>% 
  mutate(weekday = format(date, '%u')) %>% 
  filter(!weekday %in% c('6','7')) %>% 
  split(.$kreis) %>%
  map(~ loess(socdist_single_tile ~ time, data = .)) %>%
  map(predict, 1:max(df_ger$time)) %>% 
  bind_rows() %>% 
  gather(key = 'kreis', value = 'loess') %>% 
  group_by(kreis) %>% 
  mutate(time = row_number())

df_ger_loess

df_ger <- df_ger %>% merge(df_ger_loess, by=c('kreis', 'time')) %>% 
  mutate(weekday = format(date, '%u')) %>% 
  mutate(socdist_single_tile_clean = ifelse(weekday %in% c('6','7'), loess,
                                            socdist_single_tile)) %>%
  arrange(kreis, time) %>% 
  select(-weekday)

df_ger %>% drop_na()

```

```{r}

df_ger %>% ggplot(aes(x=time, y=socdist_single_tile_clean)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  theme(legend.position="none") +
  ggtitle("Overall social distancing (single tile) over time")

pers <- c('pers_o', 'pers_c', 'pers_e', 'pers_a', 'pers_n')

for (i in pers){

gg <- df_ger %>% mutate(prev_tail = cut(.[[i]], 
      breaks = c(-Inf, quantile(.[[i]], 0.2), quantile(.[[i]], 0.8), Inf),
      labels = c('lower tail', 'center', 'upper tail'))) %>% 
  filter(prev_tail != 'center') %>%
  ggplot(aes(x=time, y=socdist_single_tile_clean)) + 
  geom_point(aes(col=kreis, size=popdens)) + 
  geom_smooth(method="loess", se=T) + 
  facet_wrap(~prev_tail) + 
  theme(legend.position="none") +
  ggtitle(i)

print(gg)
}

```


```{r}

df_ger <- df_ger %>% mutate(socdist_single_tile = socdist_single_tile_clean) %>% 
  select(-loess, -socdist_single_tile_clean)

```


### Correlations
```{r}

df_ger %>% group_by(kreis) %>% 
  summarize_if(is.numeric, mean) %>% 
  select(-kreis, -time) %>% 
  cor(use='pairwise.complete') %>% 
  round(3)
 
```

# Modelling 
## Prepare functions

```{r}

# function calculates all relevant models
run_models <- function(y, lvl1_x, lvl2_x, lvl2_id, data, ctrls=F){

  # subset data
  data = data %>% 
    dplyr::select(all_of(y), all_of(lvl1_x), all_of(lvl2_x), all_of(lvl2_id), 
                  popdens, rate_day)
  data = data %>% 
    dplyr::rename(y = all_of(y),
           lvl1_x = all_of(lvl1_x),
           lvl2_x = all_of(lvl2_x),
           lvl2_id = all_of(lvl2_id)
           )
  
  # configure optimization procedure
  ctrl_config <- lmeControl(opt = 'optim', maxIter = 100, msMaxIter = 100)

  # baseline
  baseline <- lme(fixed = y ~ 1, random = ~ 1 | lvl2_id, 
                    data = data,
                    correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')

  # random intercept fixed slope
  random_intercept <- lme(fixed = y ~ lvl1_x + lvl2_x, 
                          random = ~ 1 | lvl2_id,
                            data = data,
                            correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')

  # random intercept random slope
  random_slope <- lme(fixed = y ~ lvl1_x + lvl2_x, 
                      random = ~ lvl1_x | lvl2_id, 
                        data = data,
                        correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')

  # cross level interaction
  interaction <- lme(fixed = y ~ lvl1_x * lvl2_x, 
                     random = ~ lvl1_x | lvl2_id, 
                       data = data,
                       correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')
  
  # create list with results
  results <- list('baseline' = baseline, 
                  "random_intercept" = random_intercept, 
                  "random_slope" = random_slope,
                  "interaction" = interaction)
  
  
  if (ctrls == 'dem' | ctrls == 'prev'){
    
    # random intercept random slope
    random_slope_ctrl_dem <- lme(fixed = y ~ lvl1_x + lvl2_x + popdens,
                              random = ~ lvl1_x | lvl2_id, 
                          data = data,
                          correlation = corAR1(),
                    control = ctrl_config,
                  method = 'ML')
  
    # cross level interaction
    interaction_ctrl_main_dem <- lme(fixed = y ~ lvl1_x * lvl2_x + popdens,
                             random = ~ lvl1_x | lvl2_id, 
                         data = data,
                         correlation = corAR1(),
                    control = ctrl_config,
                  method = 'ML')
  
    # cross level interaction
    interaction_ctrl_int_dem <- lme(fixed = y ~ lvl1_x * lvl2_x + lvl1_x * popdens,
                             random = ~ lvl1_x | lvl2_id, 
                         data = data,
                         correlation = corAR1(),
                    control = ctrl_config,
                  method = 'ML')        
    
    # create list with results
    results <- list('baseline' = baseline, 
                    "random_intercept" = random_intercept, 
                    "random_slope" = random_slope,
                    "interaction" = interaction,
                    "random_slope_ctrl_dem" = random_slope_ctrl_dem,
                    "interaction_ctrl_main_dem" = interaction_ctrl_main_dem,
                    "interaction_ctrl_int_dem" = interaction_ctrl_int_dem)
  }
  
  if (ctrls == 'prev'){
  
    # random intercept random slope
    random_slope_ctrl_prev <- lme(fixed = y ~ lvl1_x + lvl2_x + popdens + rate_day,
                              random = ~ lvl1_x + rate_day | lvl2_id, 
                          data = data,
                          correlation = corAR1(),
                          control = ctrl_config,
                  method = 'ML')  
    
        # cross level interaction
    interaction_ctrl_main_prev <- lme(fixed = y ~ lvl1_x * lvl2_x + popdens + rate_day,
                             random = ~ lvl1_x | lvl2_id, 
                         data = data,
                         correlation = corAR1(),
                    control = ctrl_config,
                  method = 'ML')
  
  
    # cross level interaction
    interaction_ctrl_int_prev<- lme(fixed = y ~ lvl1_x * lvl2_x + lvl1_x * popdens + rate_day,
                             random = ~ lvl1_x + rate_day | lvl2_id, 
                         data = data,
                         correlation = corAR1(),
                          control = ctrl_config,
                  method = 'ML')
  
    # create list with results
    results <- list('baseline' = baseline, 
                    "random_intercept" = random_intercept, 
                    "random_slope" = random_slope,
                    "interaction" = interaction,
                    "random_slope_ctrl_dem" = random_slope_ctrl_dem,
                    "interaction_ctrl_main_dem" = interaction_ctrl_main_dem,
                    "interaction_ctrl_int_dem" = interaction_ctrl_int_dem,                    
                    "random_slope_ctrl_prev" = random_slope_ctrl_prev,
                    "interaction_ctrl_main_prev" = interaction_ctrl_main_prev,
                    "interaction_ctrl_int_prev" = interaction_ctrl_int_prev)
  }
  
  if(ctrls == 'exp'){
    # random intercept random slope
  random_slope_exp <- lme(fixed = y ~ (lvl1_x + I(lvl1_x^2)) + lvl2_x, 
                      random = ~ (lvl1_x + I(lvl1_x^2)) | lvl2_id, 
                        data = data,
                        correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')

  # cross level interaction
  interaction_exp <- lme(fixed = y ~ (lvl1_x + I(lvl1_x^2)) * lvl2_x, 
                     random = ~ (lvl1_x + I(lvl1_x^2)) | lvl2_id, 
                       data = data,
                       correlation = corAR1(),
                  control = ctrl_config,
                  method = 'ML')  
  
  
  # create list with results
  results <- list('baseline' = baseline, 
                  "random_intercept" = random_intercept, 
                  "random_slope" = random_slope,
                  "interaction" = interaction,                  
                  "random_slope_exp" = random_slope_exp,
                  "interaction_exp" = interaction_exp)
  }
  
  return(results)
        
}

# extracts table with coefficients and tests statistics
extract_results <- function(models) {
  
  models_summary <- models %>% 
  map(summary) %>% 
  map("tTable") %>% 
  map(as.data.frame) %>% 
  map(round, 10) 
  # %>% map(~ .[str_detect(rownames(.), 'Inter|lvl'),])
  
  return(models_summary)
  
}


compare_models <- function(models) {

  mdl_names <- models %>% names()
  
  str = ''
  for (i in mdl_names){
    
    mdl_str <- paste('models$', i, sep = '')
    
    if(i == 'baseline'){
      str <- mdl_str
    }else{
    str <- paste(str, mdl_str, sep=', ')
    }
  }
  
  anova_str <- paste0('anova(', str, ')')
  mdl_comp <- eval(parse(text=anova_str))
  rownames(mdl_comp) = mdl_names
  return(mdl_comp)
}


```

## Rescale Data
```{r}

lvl2_scaled <- df_ger %>% 
  dplyr::select(-time, -rate_day, -socdist_single_tile, -date) %>% 
  distinct() %>% 
  mutate_at(vars(-kreis), scale)
  
lvl1_scaled <- df_ger %>% select(kreis, time, rate_day, socdist_single_tile) %>% 
  mutate_at(vars(-kreis, -time), scale)

df_ger_scaled <- plyr::join(lvl1_scaled, lvl2_scaled, by = 'kreis')

head(df_ger_scaled)


```



## Predict prevalence
### prevalence ~ openness
```{r}

models_o_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_o', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_o_covid)

compare_models(models_o_covid)

```

### prevalence ~ conscientiousness
```{r}

models_c_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_c', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_c_covid)

compare_models(models_c_covid)


```

### prevalence ~ extraversion
```{r}

models_e_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_e', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_e_covid)

compare_models(models_e_covid)


```

### prevalence ~ agreeableness
```{r}

models_a_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_a', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_a_covid)

compare_models(models_a_covid)


```

### prevalence ~ neuroticism
```{r}

models_n_covid <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_n', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'dem')

extract_results(models_n_covid)

compare_models(models_n_covid)


```

## Predict social distancing
### social distancing ~ openness
```{r}

models_o_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_o', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_o_socdist)

compare_models(models_o_socdist)

```

### social distancing ~ conscientiousness
```{r}

models_c_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_c', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_c_socdist)

compare_models(models_c_socdist)



```

### social distancing ~ extraversion
```{r}

models_e_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_e', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_e_socdist)

compare_models(models_e_socdist)



```

### social distancing ~ agreeableness
```{r}

models_a_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_a', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_a_socdist)

compare_models(models_a_socdist)



```

### social distancing ~ neuroticism
```{r}

models_n_socdist <-run_models(y = 'socdist_single_tile', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_n', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'prev')

extract_results(models_n_socdist)

compare_models(models_n_socdist)


```

## Explore quadratic trends 

### prevalence ~ openness
```{r}

models_o_covid_exp <-run_models(y = 'rate_day',
                         lvl1_x = 'time',
                         lvl2_x = 'pers_o',
                         lvl2_id = 'kreis',
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_o_covid_exp)

compare_models(models_o_covid_exp)

```


## prevalence ~ conscientiousness
```{r}

models_c_covid_exp <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_c', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_c_covid_exp)

compare_models(models_c_covid_exp)

```

### prevalence ~ extraversion
```{r}

models_e_covid_exp <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_e', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_e_covid_exp)

compare_models(models_e_covid_exp)

```

### prevalence ~ agreeableness
```{r}

models_a_covid_exp <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_a', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_a_covid_exp)

compare_models(models_a_covid_exp)

```

### prevalence ~ neuroticism
```{r}

models_n_covid_exp <-run_models(y = 'rate_day', 
                         lvl1_x = 'time', 
                         lvl2_x = 'pers_n', 
                         lvl2_id = 'kreis', 
                         data = df_ger_scaled,
                         ctrls = 'exp')

extract_results(models_n_covid_exp)

compare_models(models_n_covid_exp)

```

## Create overview table 

### Define function to create overview tables
```{r}

summary_table <- function(models, dv_name, prev=F){

  temp_df_ctrl_main <- NULL
  temp_df_ctrl_int <- NULL
  temp_df_ctrl_int_prev <- NULL
  
  for (i in models){
    results <- i %>% extract_results()
    
    results_ctrl_main <- results$interaction_ctrl_main_dem['lvl1_x:lvl2_x',]
    temp_df_ctrl_main <- temp_df_ctrl_main %>% rbind(results_ctrl_main)
    
    results_ctrl_int <- results$interaction_ctrl_int_dem['lvl1_x:lvl2_x',]
    temp_df_ctrl_int <- temp_df_ctrl_int %>% rbind(results_ctrl_int)
    
    if(prev){
      results_ctrl_int_prev <- results$interaction_ctrl_int_prev['lvl1_x:lvl2_x',]
      temp_df_ctrl_int_prev <- temp_df_ctrl_int_prev %>% rbind(results_ctrl_int_prev)
    }
        
  }
  
  names_ctrl_main <- paste0(dv_name, '~', c('o', 'c', 'e', 'a', 'n'), '*time', '_crtl_popdens')
  rownames(temp_df_ctrl_main) <- names_ctrl_main

  names_ctrl_int <- paste0(dv_name, '~', c('o', 'c', 'e', 'a', 'n'), '*time', '_crtl_popdens*time')
  rownames(temp_df_ctrl_int) <- names_ctrl_int

  if(prev){
    names_ctrl_int_prev <- paste0(dv_name, '~', c('o', 'c', 'e', 'a', 'n'), '*time', '_crtl_popdens*time_prev')
    rownames(temp_df_ctrl_int_prev) <- names_ctrl_int_prev
    
    sum_tab <- rbind(temp_df_ctrl_main, temp_df_ctrl_int, temp_df_ctrl_int_prev) %>% round(4)
  }else{
    sum_tab <- rbind(temp_df_ctrl_main, temp_df_ctrl_int) %>% round(4)
  }


  
  return(sum_tab)

} 

```

### Create overview tables
```{r}
# prevalence
models_prev <- list(models_o_covid, 
                    models_c_covid, 
                    models_e_covid, 
                    models_a_covid, 
                    models_n_covid)

sum_tab_prev <- summary_table(models_prev, dv_name = 'prev')

write.table(sum_tab_prev, quote=F)

# social distancing
models_socdist <- list(models_o_socdist, 
                       models_c_socdist, 
                       models_e_socdist, 
                       models_a_socdist, 
                       models_n_socdist)

sum_tab_socdist <- summary_table(models_socdist, dv_name = 'socdist', prev=T)

write.table(sum_tab_socdist, quote=F)



```

# Conditional random forest analysis 

### Extract slopes prevalence
```{r}

# slope prevalence
df_ger_slope_prev <- df_ger %>% split(.$kreis) %>% 
  map(~ lm(rate_day ~ time, data = .)) %>%
  map(coef) %>% 
  map_dbl('time') %>% 
  as.data.frame() %>% 
  rownames_to_column('kreis') %>% 
  rename(slope_prev = '.')

# merge with control variables 
df_ger_slope_prev <- df_ger %>% select(-time, -date, -rate_day, -socdist_single_tile) %>%
  distinct() %>% 
  inner_join(df_ger_slope_prev, by = 'kreis') %>%
  drop_na()

df_ger_slope_prev

```


### Extract slopes social distancing
```{r}

# slope socdist
df_ger_slope_socdist <- df_ger_scaled %>% split(.$kreis) %>%
  map(~ lm(socdist_single_tile ~ time, data = .)) %>%
  map(coef) %>%
  map_dbl('time') %>%
  as.data.frame() %>%
  rownames_to_column('kreis') %>%
  rename(slope_socdist = '.')


# merge with control variables 
df_ger_slope_socdist <- df_ger_scaled %>% 
  select(-time, -date, -socdist_single_tile, -rate_day) %>%
  distinct() %>%
  inner_join(df_ger_slope_socdist, by = 'kreis') %>%
  drop_na()

df_ger_slope_socdist

```

### Explore distribution of slopes
```{r}

df_ger_slope_prev %>% ggplot(aes(slope_prev)) + geom_histogram(bins = 100)

df_ger_slope_socdist %>% ggplot(aes(slope_socdist)) + geom_histogram(bins = 100)


```

# CRF prevalence ~ openness
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_o_fit_prev <- cforest(slope_prev ~ pers_o + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_o_varimp_prev <- varimp(crf_o_fit_prev, nperm = 1)
crf_o_varimp_cond_prev <- varimp(crf_o_fit_prev, conditional = T, nperm = 1)

crf_o_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_o_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') +
  theme(axis.text.x = element_text(angle = 90))

```

# CRF prevalence ~ conscientiousness
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_c_fit_prev <- cforest(slope_prev ~ pers_c + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_c_varimp_prev <- varimp(crf_c_fit_prev, nperm = 1)
crf_c_varimp_cond_prev <- varimp(crf_c_fit_prev, conditional = T, nperm = 1)

crf_c_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_c_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```


# CRF prevalence ~ extraversion
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_e_fit_prev <- cforest(slope_prev ~ pers_e + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_e_varimp_prev <- varimp(crf_e_fit_prev, nperm = 1)
crf_e_varimp_cond_prev <- varimp(crf_e_fit_prev, conditional = T, nperm = 1)

crf_e_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_e_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```


# CRF prevalence ~ agreeableness
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_a_fit_prev <- cforest(slope_prev ~ pers_a + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_a_varimp_prev <- varimp(crf_a_fit_prev, nperm = 1)
crf_a_varimp_cond_prev <- varimp(crf_a_fit_prev, conditional = T, nperm = 1)

crf_a_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_a_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```


# CRF prevalence ~ neuroticism
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_n_fit_prev <- cforest(slope_prev ~ pers_n + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_prev[-1], 
                         controls = ctrls)

crf_n_varimp_prev <- varimp(crf_n_fit_prev, nperm = 1)
crf_n_varimp_cond_prev <- varimp(crf_n_fit_prev, conditional = T, nperm = 1)

crf_n_varimp_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_n_varimp_cond_prev %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```


# CRF social distancing ~ openness
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_o_fit_socdist <- cforest(slope_socdist ~ pers_o + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_o_varimp_socdist <- varimp(crf_o_fit_socdist, nperm = 1)
crf_o_varimp_cond_socdist <- varimp(crf_o_fit_socdist, conditional = T, nperm = 1)

crf_o_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_o_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```

# CRF social distancing ~ conscientiousness
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_c_fit_socdist <- cforest(slope_socdist ~ pers_c + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_c_varimp_socdist <- varimp(crf_c_fit_socdist, nperm = 1)
crf_c_varimp_cond_socdist <- varimp(crf_c_fit_socdist, conditional = T, nperm = 1)

crf_c_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_c_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```

# CRF social distancing ~ extraversion
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_e_fit_socdist <- cforest(slope_socdist ~ pers_e + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_e_varimp_socdist <- varimp(crf_e_fit_socdist, nperm = 1)
crf_e_varimp_cond_socdist <- varimp(crf_e_fit_socdist, conditional = T, nperm = 1)

crf_e_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_e_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```

# CRF social distancing ~ agreeableness
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_a_fit_socdist <- cforest(slope_socdist ~ pers_a + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_a_varimp_socdist <- varimp(crf_a_fit_socdist, nperm = 1)
crf_a_varimp_cond_socdist <- varimp(crf_a_fit_socdist, conditional = T, nperm = 1)

crf_a_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_a_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```


# CRF social distancing ~ neuroticism
```{r}

ctrls <- cforest_unbiased(ntree=500, mtry=5)

crf_n_fit_socdist <- cforest(slope_socdist ~ pers_n + women + academics +
                          hospital_beds + gdp + manufact +
                          airport + age + popdens, 
                         df_ger_slope_socdist[-1], 
                         controls = ctrls)

crf_n_varimp_socdist <- varimp(crf_n_fit_socdist, nperm = 1)
crf_n_varimp_cond_socdist <- varimp(crf_n_fit_socdist, conditional = T, nperm = 1)

crf_n_varimp_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

crf_n_varimp_cond_socdist %>% as.data.frame() %>% rownames_to_column('variable') %>%
  ggplot(aes(x=variable, y=.)) +
  geom_bar(stat = 'identity') + 
  theme(axis.text.x = element_text(angle = 90))

```

# Change point analysis
### Preparation
```{r}

# keep only counties with full data
kreis_complete <- df_ger_scaled %>% 
  group_by(kreis) %>% 
  summarize(n = n()) %>% 
  filter(n==max(.$n)) %>% 
  .$kreis
```

### Prevalence
```{r}

# run changepoint analysis
df_ger_prev_cpt_results <- df_ger_scaled %>% select(kreis, rate_day) %>%
  filter(kreis %in% kreis_complete) %>% 
  split(.$kreis) %>%
  map(~ cpt.meanvar(as.vector(.$rate_day),
                    class=TRUE,
                    param.estimates=TRUE,
                    Q=1))

# calculate change points
df_ger_prev_cpt_day <- df_ger_prev_cpt_results %>% 
  map(cpts) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(cpt_day_prev = '.') %>%
  rownames_to_column('kreis')

# calculate mean differences
df_ger_prev_cpt_mean_diff <- df_ger_prev_cpt_results %>% 
  map(param.est) %>% 
  map(~ .$mean) %>% 
  map(~ .[2]-.[1]) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(mean_diff_prev = '.') %>%
  rownames_to_column('kreis')

# calculate varaince differences
df_ger_prev_cpt_var_diff <- df_ger_prev_cpt_results %>% 
  map(param.est) %>% 
  map(~ .$variance) %>% 
  map(~ .[2]-.[1]) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(var_diff_prev = '.') %>%
  rownames_to_column('kreis')

# merge new variables 
df_ger_cpt_prev <- df_ger_scaled %>%
  select(-time, -rate_day, -socdist_single_tile) %>%
  distinct() %>%
  mutate(kreis = as.character(kreis)) %>%
  left_join(df_ger_prev_cpt_day, by='kreis') %>%
  left_join(df_ger_prev_cpt_mean_diff, by='kreis') %>%
  left_join(df_ger_prev_cpt_var_diff, by='kreis')

df_ger_cpt_prev %>% select(cpt_day_prev) %>% map(hist)
df_ger_cpt_prev %>% select(mean_diff_prev) %>% map(hist)
df_ger_cpt_prev %>% select(var_diff_prev) %>% map(hist)

df_ger_cpt_prev %>% dim()
df_ger_cpt_prev %>% drop_na() %>% dim()

```


```{r}

for(i in head(df_ger_prev_cpt_results,5)){
  plot(i)
}

```

### Social distancing
```{r}

# run changepoint analysis
df_ger_socdist_cpt_results <- df_ger_scaled %>% select(kreis, socdist_single_tile) %>%
  filter(kreis %in% kreis_complete) %>% 
  split(.$kreis) %>%
  map(~ cpt.meanvar(as.vector(.$socdist_single_tile),
                    #penalty = 'Asymptotic',
                    class=TRUE,
                    param.estimates=TRUE,
                    Q=1,
                    test.stat = 'Normal'))

# calculate change point
df_ger_socdist_cpt_day <- df_ger_socdist_cpt_results %>% 
  map(cpts) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(cpt_day_socdist = '.') %>%
  rownames_to_column('kreis')

# calculate mean differences
df_ger_socdist_cpt_mean_diff <- df_ger_socdist_cpt_results %>% 
  map(param.est) %>% 
  map(~ .$mean) %>% 
  map(~ .[2]-.[1]) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(mean_diff_socdist = '.') %>%
  rownames_to_column('kreis')

# calculate varaince differences
df_ger_socdist_cpt_var_diff <- df_ger_socdist_cpt_results %>% 
  map(param.est) %>% 
  map(~ .$variance) %>% 
  map(~ .[2]-.[1]) %>% 
  unlist() %>% 
  as.data.frame() %>% 
  rename(var_diff_socdist = '.') %>%
  rownames_to_column('kreis')

# merge new variables 
df_ger_cpt_prev_socdist <- df_ger_cpt_prev %>%
  left_join(df_ger_socdist_cpt_day, by='kreis') %>%
  left_join(df_ger_socdist_cpt_mean_diff, by='kreis') %>%
  left_join(df_ger_socdist_cpt_var_diff, by='kreis')

df_ger_cpt_prev_socdist %>% select(cpt_day_socdist) %>% map(hist)
df_ger_cpt_prev_socdist %>% select(mean_diff_socdist) %>% map(hist)
df_ger_cpt_prev_socdist %>% select(var_diff_socdist) %>% map(hist)

df_ger_cpt_prev_socdist %>% dim()
df_ger_cpt_prev_socdist %>% drop_na() %>% dim()

```

```{r}

for(i in head(df_ger_socdist_cpt_results,5)){
  plot(i)
}

```

# Predicting change points 
### Linear models predicting change points (no controls)
```{r}

lm_cpr_prev_pers <- lm(cpt_day_prev ~ pers_o + pers_c + pers_e + pers_a + pers_n, 
                         data = df_ger_cpt_prev_socdist)
lm_cpr_prev_pers %>% summary()


lm_cpt_socdist_pers <- lm(cpt_day_socdist ~ pers_o + pers_c + pers_e + pers_a + pers_n, 
                            data = df_ger_cpt_prev_socdist)
lm_cpt_socdist_pers %>% summary()

```

### Linear models predicting change points with controls
```{r}
df_ger_cpt_prev_socdist

lm_cpt_prev_pers <- lm(cpt_day_prev ~ pers_o + pers_c + pers_e + pers_a + pers_n + 
                         women + academics + hospital_beds + gdp + manufact +
                          airport + age + popdens,
                         data = df_ger_cpt_prev_socdist)
lm_cpt_prev_pers %>% summary()

lm_cpt_socdist_pers <- lm(cpt_day_socdist ~ pers_o + pers_c + pers_e + pers_a + pers_n + 
                            women + academics + hospital_beds + gdp + manufact +
                            airport + age + popdens,
                            data = df_ger_cpt_prev_socdist)
lm_cpt_socdist_pers %>% summary()

```


